options(repos = c(CRAN = "https://cran.rstudio.com"))
library(tidyverse)
library(sf)
library(maps)  
library(plotly)
library(ggplot2)
library(ggmap)
library(ggspatial)

crime_and_incarceration_by_state <- read.csv("FinalProject/data/crime_and_incarceration_by_state.csv")
unemployment_county <- read.csv("FinalProject/data/unemployment_county.csv")
state_polygons <- st_read("FinalProject/data/tl_2019_us_state/tl_2019_us_state.shp")
## Reading layer `tl_2019_us_state' from data source 
##   `/home/rstudio/FinalProject/data/tl_2019_us_state/tl_2019_us_state.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 56 features and 14 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
## Geodetic CRS:  NAD83
# crime_and_incarceration_by_state
crime_and_incarceration_by_state <- crime_and_incarceration_by_state %>%
  filter(!(jurisdiction %in% c("ALASKA", "HAWAII", "FEDERAL")))

crime_and_incarceration_by_state <- crime_and_incarceration_by_state %>%
  rename(STUSPS = jurisdiction)

crime_and_incarceration_by_state <- crime_and_incarceration_by_state %>%
  rename(Year = year)

crime_and_incarceration_by_state <- crime_and_incarceration_by_state %>%
  filter(Year >= 2007 & Year <= 2014)

state_abbreviations <- state.abb
crime_and_incarceration_by_state <- crime_and_incarceration_by_state %>%
  mutate(STUSPS = tolower(STUSPS),
         STUSPS = state_abbreviations[match(STUSPS, tolower(state.name))])

crime_and_incarceration_by_state <- crime_and_incarceration_by_state %>%
  mutate(crime_rate = property_crime_total / state_population)

crime_and_incarceration_by_state <- crime_and_incarceration_by_state %>%
  select(STUSPS, Year, crime_rate)
head(crime_and_incarceration_by_state)
##   STUSPS Year crime_rate
## 1     AL 2007 0.03977699
## 2     AZ 2007 0.04532562
## 3     AR 2007 0.03955486
## 4     CA 2007 0.03043535
## 5     CO 2007 0.02999230
## 6     CT 2007 0.02470599
# unemployment_county
unemployment_county <- unemployment_county %>%
  rename(STUSPS = State)

# Step 2: Remove rows for ALASKA, HAWAII, FEDERAL
unemployment_county <- unemployment_county %>%
  filter(STUSPS %in% c("AL", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"))

# Step 3: Filter years: 2007-2014
unemployment_county <- unemployment_county %>%
  filter(Year >= 2007, Year <= 2014)

unemployment_county <- unemployment_county %>%
  group_by(STUSPS, Year) %>%
  summarize(Avg_Unemployment_Rate = mean(Unemployment.Rate, na.rm = TRUE))



print(unemployment_county)
## # A tibble: 384 × 3
## # Groups:   STUSPS [48]
##    STUSPS  Year Avg_Unemployment_Rate
##    <chr>  <int>                 <dbl>
##  1 AL      2007                  4.9 
##  2 AL      2008                  6.98
##  3 AL      2009                 13.2 
##  4 AL      2010                 12.3 
##  5 AL      2011                 11.2 
##  6 AL      2012                  9.25
##  7 AL      2013                  8.49
##  8 AL      2014                  7.90
##  9 AR      2007                  6.05
## 10 AR      2008                  6.21
## # ℹ 374 more rows
# state_polygons
state_polygons <- state_polygons %>%
  filter(!(STUSPS %in% c("AK", "AS", "MP", "PR", "VI", "HI", "GU")))

state_polygons <- select(state_polygons, REGION, STUSPS, geometry)

str(state_polygons)
## Classes 'sf' and 'data.frame':   49 obs. of  3 variables:
##  $ REGION  : chr  "3" "3" "2" "2" ...
##  $ STUSPS  : chr  "WV" "FL" "IL" "MN" ...
##  $ geometry:sfc_MULTIPOLYGON of length 49; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:35204, 1:2] -81.7 -81.7 -81.7 -81.7 -81.7 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA
##   ..- attr(*, "names")= chr [1:2] "REGION" "STUSPS"
##########
combined_data <- right_join(state_polygons, unemployment_county, by = "STUSPS") %>%
  right_join(., crime_and_incarceration_by_state, by = c("STUSPS", "Year"))

combined_data <- combined_data %>%
  select(REGION, STUSPS,Year, Avg_Unemployment_Rate, crime_rate, geometry)


missing_values <- which(is.na(combined_data))
print(missing_values)
## integer(0)
saveRDS(combined_data, "combined_dataset.rds")



#######
selected_states <- c("IL", "CA", "ID", "IN")

time_series_data <- combined_data %>%
  filter(STUSPS %in% selected_states)

unemployment_plot <- plot_ly(
  data = time_series_data,
  x = ~Year,
  y = ~Avg_Unemployment_Rate,
  color = ~STUSPS) %>%
  add_lines() %>% 
  layout(title = "Unemployment Rate Over Time",
         xaxis = list(title = "Year"),
         yaxis = list(title = "Average Unemployment Rate"))


spatial_map <- ggplot() +
  geom_sf(data = combined_data %>% filter(Year == 2014),
          aes(fill = Avg_Unemployment_Rate),
          color = "white",
          size = 0.2) +
  scale_fill_viridis_c() +
  labs(title = "Unemployment Rate Spatial Map (2014)",
       fill = "Average Unemployment Rate") +
  ggsn::scalebar(data = combined_data, dist = 1000, dist_unit = "km", location = "bottomleft", transform = TRUE, model = "WGS84") +
  ggspatial::annotation_north_arrow(location = "br", which_north = "true", style = north_arrow_fancy_orienteering) +
  xlab("Longitude") + ylab("Latitude") +
  theme_minimal()

spatial_map_region_1 <- ggplot() +
  geom_sf(data = combined_data %>% filter(Year == 2014, REGION == 1),  # Filter for Region 1
          aes(fill = Avg_Unemployment_Rate),
          color = "white",
          size = 0.2) +
  scale_fill_viridis_c() +
  labs(title = "Unemployment Rate Spatial Map for Region 1 (2014)",
       fill = "Average Unemployment Rate") +
  ggsn::scalebar(data = combined_data, dist = 200, dist_unit = "km", location = "bottomleft", transform = TRUE, model = "WGS84") +
  ggspatial::annotation_north_arrow(location = "br", which_north = "true", style = north_arrow_fancy_orienteering) +
  xlab("Longitude") + ylab("Latitude") +
  theme_minimal()

scatter_plot <- plot_ly(
  data = combined_data %>% filter(Year == 2014),
  x = ~crime_rate,
  y = ~Avg_Unemployment_Rate,
  color = ~REGION) %>% 
  add_markers(text = ~paste("State: ", STUSPS)) %>%
  layout(title = "Scatter Plot for Unemployment Rate vs Crime Rate (2014)",
         xaxis = list(title = "Crime Rate"),
         yaxis = list(title = "Average Unemployment Rate"))


spatial_map

spatial_map_region_1

scatter_plot
unemployment_plot